package com.chaosj.webmagic.process;

import cn.hutool.core.text.StrFormatter;
import com.chaosj.webmagic.dto.BlogHostEnums;
import com.chaosj.webmagic.md.StorageApi;
import org.apache.commons.lang3.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;


/**
 * @className: WeixinScrapeProcess
 * @description:
 * @author: caoyangjie
 * @date: 2022/1/4
 **/
public class WeixinScrapeProcess extends BasePageProcessor implements PageProcessor {

    @Override
    protected String titleXPath() {
        return "//h1[@id=activity-name]/text()";
    }

    @Override
    protected String authorXPath() {
        return "//a[@id='js_name']/text()";
    }

    @Override
    protected String contentXPath() {
        return "//div[@id='js_content']";
    }

    @Override
    public Site getSite() {
        return Site.me().setDomain(BlogHostEnums.WECHAT.getUrl());
    }

    @Override
    protected void doMoreProcess(Page page, Boolean error) {
        String mp3Name = page.getHtml().xpath("//mpvoice/@name").get();
        String mp3Url = page.getHtml().xpath("//mpvoice/@voice_encode_fileid").get();
        if(!StringUtils.isBlank(mp3Name) && !StringUtils.isBlank(mp3Url)){
            StorageApi.saveVideo(StrFormatter.format("https://res.wx.qq.com/voice/getvoice?mediaid={}",mp3Url), mp3Name);
        }
    }
}
